# GOV 2001 - Final Project - Leah Stokes & Chase Foster
# Replicating RPS paper
# APRIL 30 2013

#CHASE WORKING DIRECTORY
#setwd("~/Documents/MIT/Spring 2013/GOV2001/Project")
getwd()


#LEAH WORKING DIRECTORY
setwd("~/Documents/MIT/Spring 2013/GOV2001/Project")
getwd()

library(foreign)
library(Zelig)
library(ZeligGAM)
library(dataframes2xls) 
library(xtable)
library(memisc) #this has mtable
library(MNP) #this will make a multinomial probit model
library(stats)
library(ZeligChoice) # this is also for the MNP

#install.packages("~/Documents/MIT/Spring 2013/GOV2001/Project/Zelig_3.5.4.tar.gz", repos = NULL, type="source")



##########

# REPLICATION OF LYON & YIN - MODEL 1

a <- read.dta("RPSDATA5.dta")


summary(a) #panel data: state by year 1994-2007
head(a)

summary(a$year)
summary(a$stateabbre)

#NEED TO SUBSET THE DATA - Colorado, DC, Iowa, Nebraska, Washington
#Iowa is already not in the dataset - because it passed its RPS in 1983
#Nebraska = non partisan
#DC doesn't have that much data
#Colorado & Washington came out of direct democracy
a <- a[-which(a$stateabbre=="CO"),]
a <- a[-which(a$stateabbre=="DC"),]
a <- a[-which(a$stateabbre=="NE"),]
a <- a[-which(a$stateabbre=="WA"),]

length(unique(a$stateabbre)) # now we have 46 observations

# Need to export the dataset to clean up some of the NAs 
#dataframes2xls::write.xls(a, "stateRPS2.xls")

f <- read.csv("stateRPS2.csv") #this is the second version

##TABLE 3 REPLICATION###
###SUMMARY STATISTICS###
#FOR THEIR SUBSET IN THE PAPER#

#Continuous variables
#Non-attainment index in year t-1 = NAindexNDlag
mean(a$NAindexalllag) #0.44
sd(a$NAindexalllag) #0.53 
min(a$NAindexalllag) #0
round(max(a$NAindexalllag), 2) #2.68 

#Emissions from electricity generation - units are correct for other version
round(mean(a$GHGLagRescale), 2) #48.13
round(sd(a$GHGLagRescale), 2) #40.89
round(min(a$GHGLagRescale), 2) #0.01
round(max(a$GHGLagRescale), 2) #247.93

#Unemployment rate
round(mean(a$unemploylag), 2) #5.05
round(sd(a$unemploylag), 2) #1.24
round(min(a$unemploylag), 2) #2.3
round(max(a$unemploylag), 2) #10

#Percentage renewable energy capacity lagged #RenCAYPerlag 
round(mean(a$RenCAYPerlag), 3) #2.07
round(sd(a$RenCAYPerlag), 2) #3.18
round(min(a$RenCAYPerlag), 2) #0
round(max(a$RenCAYPerlag), 2) #37.14

#Oil & Natural Gas Industry Influence - logged - lnOIlNGemper1000
round(mean(a$lnOIlNGemper1000), 3) # 0.32
round(sd(a$lnOIlNGemper1000), 3) # 0.52
round(min(a$lnOIlNGemper1000), 2) #0
round(max(a$lnOIlNGemper1000), 2) #1.96

#Coal - lnCoalemploy1000
round(mean(a$lnCoalemploy1000), 2) #0.30
round(sd(a$lnCoalemploy1000), 2) #0.60
round(min(a$lnCoalemploy1000), 2) #0
round(max(a$lnCoalemploy1000), 2) #2.45

#Farms - PerFarmLandhalf
round(mean(a$PerFarmLandhalf), 2) #0.34
round(sd(a$PerFarmLandhalf), 2) #0.34
round(min(a$PerFarmLandhalf), 2) #0
round(max(a$PerFarmLandhalf), 2) #1

#LCVlag
round(mean(a$LCVlag, na.rm=T), 2) #40.82
round(sd(a$LCVlag, na.rm=T), 2) #26.54
round(min(a$LCVlag, na.rm=T), 2) #0
round(max(a$LCVlag, na.rm=T), 2) #97.5

#Democrats in State Leg - DemPerReS
round(mean(a$DemPerReS, na.rm=T), 2) #50.25
round(sd(a$DemPerReS, na.rm=T), 2) #15.79
round(min(a$DemPerReS, na.rm=T), 2) #11.43
round(max(a$DemPerReS, na.rm=T), 2) #88.16

#Percentage of Natural Gas Generation - NGPerlag
round(mean(a$NGPerlag), 2) #0.13
round(sd(a$NGPerlag), 2) #0.20
round(min(a$NGPerlag), 2) #0
round(max(a$NGPerlag), 2) #0.98

#State electricity price in year t-1 - Avpricelag
round(mean(a$Avepricelag), 2) #6.97
round(sd(a$Avepricelag), 2) #2.09
round(min(a$Avepricelag), 2) #3.87
round(max(a$Avepricelag), 2) #14.47

#Median income lag - incomelag
round(mean(a$incomelag), 2) #54.77
round(sd(a$incomelag), 2) #9.83
round(min(a$incomelag), 2) #32.59
round(max(a$incomelag), 2) #87.4

#Biomass potential million tonnes - tbinmillion
round(mean(a$tbinmillion), 2) #7.93
round(sd(a$tbinmillion), 2) #6.06
round(min(a$tbinmillion), 2) #0.17
round(max(a$tbinmillion), 2) #28.28

#Categorical variables
#NONE OF THE CATEGORICAL VARIABLES WORK!!!
#Staffed ASES Chapter - can't figure out which variable is right; none ==40%

# we tried running this on the FULL data set...
b <- read.dta("RPSDATA5.dta") #this is the full, non-subset data

mean(b$ases_chapter, na.rm=T) #68%
mean(b$newASES, na.rm=T) #0.30
mean(b$ases_independent_chapter, na.rm=T)
#sum(b$ases_independent_chapter, na.rm=T)/519 #39%
#sum(b$ases_chapter, na.rm=T)/519 #68%
#sum(b$ases_regional_chapter, na.rm=T)/519 #30%
#sum(b$newASES, na.rm=T)/519 #31%


# We tried running this on the subset data
#Republican Governorship
mean(b$governor) #57% with the full data - this is correct
a$gov
a$gov2 <- a$gov
a$gov2 <- as.numeric(a$gov2)
a$gov2
a$gov2 <- ifelse(a$gov=="D" | a$gov=="Dem" | a$gov=="I" | a$gov=="u", 0, 1) #Independent is coded as a Democrat
mean(a$gov2)

a$gov2==a$governor #371 and 372
a[which(a$gov2!=a$governor),] #Oregon - was Dem or Rep in 2002/2003? DEM

colnames(a)

#Restructured Electricity Market #WDS?
mean(a$restructure2) #28%

#wind1 wind2 wind3 - could also be wind potential?
mean(a$wind1) #73% = low
mean(a$wind2) #11% = medium PROBLEM
mean(a$wind3) #16% = high PROBLEM
mean(a$windpotential)

#solar1 solar2 solar3 => problematic 
mean(a$solar1) #39% = low
mean(a$solar2) #53% = medium PROBLEM
mean(a$solar3) #9% = high PROBLEM
mean(a$solar)

##SubsetDataRPSStates => to try to fix the categorical variables
##We want only the values for the most recent year of each state.
##All of the states that have not adopted an RPS have values through year 2007
##All of the states that have adopted an RPS, have RPS==1 in the year they adopted it
##We can subset the data by calling these two features
d <- subset(a, RPS > 0 | year > 2006)
dim(d)#Results in 46 rows, one per state

##Also subset the full dataset in the same way
e <- subset(b, RPS > 0 | year > 2006)
dim(e) #Results in 50 rows, one per state

#also try with our new data and the 0s corrected for ases_independent_chapter - this data was already subset
g <- subset(f, RPS > 0 | year > 2006)
dim(g) #Results in 46 rows, one per state

##We check to see if the mean values of the categorical variables is closer to this subsetted data "d" - the subset on the authors subset

#Low Wind
mean(d$wind1) #0.72
mean(e$wind1)

#Medium Wind
mean(d$wind2) #0.13
mean(e$wind2)

#High Wind
mean(d$wind3)#0.15
mean(e$wind3)

#Low Solar
mean(d$solar1)#0.39
mean(e$solar1)

#Medium Solar
mean(d$solar2)#0.48
mean(e$solar2)

#High Solar
mean(d$solar3)#0.13
mean(e$solar3)

#Restructure
mean(d$restructure2) #0.48. Result in paper is 0.54
mean(e$restructure2)

#Staffed ASES chapter
mean(d$ases_chapter) #0.73
mean(e$ases_chapter) #0.74
mean(d$newASES) #0.37
mean(e$newASES)
mean(g$ases_independent_chapter) #37
#this means that newASES is just the presence of an independent ASES chapter

     
#Republican Governor
mean(d$governor)
mean(e$governor)
     
##We get the right results for all of the states where the variables
##are the same from year to year (wind, solar) and for governor. We
##do not get the exact right results for ASES or Restructure, but the results are close.
##It may be that the value for each state is averaged first as a state and then averaged
##equally with every other state. 



###MODEL 1###
#LOGISTIC MODEL#

mod.logit1 <- zelig(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit1)
#you don't put wind1/2/3 because then it would be colinear and would break the regression; the as.factor will drop one and create a reference category

mod.logit2 <- glm(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, family = binomial(link = logit), data=a) #how to in glm
summary(mod.logit2)

# "newASES" and "restructure2" are likely some of the problematic variables which may be breaking the logistic and not allowing us to get the same results

#OUTPUT the table

#This is the personalised output
starred = getCoefTemplate()$ci.p.horizontal
starred[1,1] = "($est:#)($p:*)"
setCoefTemplate(starred=starred)

mtable(mod.logit, coef.style="starred")



### TABLE 5 FROM ORIGINAL PAPER - MNP for RPS w and w/o instate requirements ###

mod.mnprobit <- mnp(as.factor(RPS2) ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + ases_chapter + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, data=a)
summary(mod.mnprobit)


#note: this is not the same model they run... but it is related
mod.oprobit <- zelig(as.factor(RPS2) ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + ases_chapter + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, data=a, model="oprobit", cite=F)
summary(mod.oprobit)


##########
# PART II - IMPROVING THE DATA
# MODELS 2 & 3 in paper


##Correcting the Model for Bad Data###

#We run everything again using the new excel file that we created. This demonstrates that all of the relevant variables are still intact.  

a <- read.csv("RPSDataExtension.csv") #this already excludes the same states they exclude in their paper
summary(a)


##We can run the Lyon and Yin model using the .csv file, by excluding years 2008 and 2009

a <- a[-which(a$year=="2008"),]
a <- a[-which(a$year=="2009"),]

#MODEL 1 in the PAPER
mod1 <- zelig(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod1)

# some first differences from their original model
mean(a$restructure22) #0.236

x.out <- setx(mod1, restructure2 = 1) #pick a value
s.out <- sim(mod1, x=x.out)

x.low <- setx(mod1, restructure2 = 0)
x.high <- setx(mod1, restructure2 = 1)
s.out <- sim(mod1, x=x.low, x1=x.high) #this will compare the relationship between 2 things
summary(s.out)



##Try with Solar2.csv (a complete copy of the RPS5.dta file) to test to see if we get the same results - this is the same as their original datafile but with updated info
summary(mod.logit2) #mod.logit2 is the logit regression model using RPS5.dta. Note to Leah: We are getting the same results. I just checked. We may want to relabel everything to make it more clear. 

mod.logit1 <- zelig(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS + governor + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.logit1)
summary(mod.logit3)

##It looks like the data change has not altered the Lyon and Yin model 

#Now I will demonstrate the effect of the corrections that I made after comparing Lyon and Yin's data to publicly available information. 

###Updated and corrected data###

##The most egregious error they made was miscoding their political affiliation variables. All of the party affiliation and governor variables are coded one year too early. In other words they coded the governor as beginning her term the year that she was elected, rather than the year she started her term. At first I thought that this may have been a lagged variable, but in runs in the opposite direction of a lag, coding 1994 as having a governor who does not take office until 1995. This is clearly an error. Additionally, the authors miscoded a number of Southern states that have off-year elections. 

#To correct for this we replace governor with governor2 and DemPerRes with DemPerReS2. You can see the relationship in the .csv file. We leave everything else the same. 

mod.logit2 <- zelig(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS2 + governor2 + NGPerlag + restructure2 + Avepricelag + incomelag + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit2)

#Running the model with these changes increases the significance of the Democratic legislature, income, and newASES variables.

##Another major error was the use of nominal income rather than real income. Without factoring in inflation, nominal income tells us very little. At best, it may still allow us to meaningfully compare the influence income across states, but at worst, it screws up all the results.  With the passage of RPS laws skewing toward the end of the time series, and with nominal income rates increasing much more over time than real income, the Lyon and Yin model should exaggerate the impact of income on RPS passage.  This is particularly puzzling because the authors DO use inflation-adjusted price measures for their average electricity price. 

##This model replaces incomelag with a real income measure in 2011 dollars, from a database on the US Census webpage. 

summary(b)

model.logit3 <- zelig(RPS ~ NAindexalllag + GHGLagRescale + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS2 + governor2 + NGPerlag + restructure2 + Avepricelag + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(model.logit3)


##With this control all of the significance of income goes away. It looks like we're now in the worst case scenario. Controlling for income takes away most of Lyon and Yin's findings. !!

##Another problem was with GHGLagRescale, a variable that tells us the total emissions produced by a state. The theory is that states that produce more emissions may be more likely to pass an RPS. The problem with this approach is that it is not comparison-worthy between states because different sized states will have different levels of emissions even if they are equally efficient. Perhaps the authors have another theory for including the raw emissions (i.e. states with bigger sheer totals of emissinos will feel more of a burden to enact climate action?). In any case it is undertheorized. Better indicators would be per capita emissions or emissions intensity of GDP. 
##We devised a new variable, GHGLag2PerCapita, which captures this. It is divided by a static population number for each state, which can easily be updated with year adjusted population.  
##We also tried to create a measure of GHG intensity of the economy.

mod.logit4a <- zelig(RPS ~ NAindexalllag + GHGLag2PerCapita + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS2 + governor2 + NGPerlag + restructure2 + Avepricelag + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit4a)



##The per capita figure changes things a bit, notably increasing the significance and the intercept of Democratic percentage. However, the basic results remain the same. 

#below is for GHG intensity of the economy - this model basically makese the GHG importance go to zero... both substantively and statistically

mod.logit4b <- zelig(RPS ~ NAindexalllag + GHGlagperRealGDP + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS2 + governor2 + NGPerlag + restructure2 + Avepricelag + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit4b)



##Finally, the last important correction was to their restructure2 variable. The authors had miscoded several of the states, particularly states that began to restructure and then suspended restructuring. This was likely just  a careless error, since the laws were often confusing and required some fine reading to actually identify what had happened in each place. For states that suspended restructuring, the authors dataset codes the state for every year after the initial law as restructured. This new coding is restructure22 and indicates the years after restructuring was suspended with a zero (e.g. in AR).

a[which(a$stateabbre=="AR"),] #shows the new coding for AR

mod.logit5 <- zelig(RPS ~ NAindexalllag + GHGlagperRealGDP + unemploylag + RenCAYPerlag + newASES + lnOIlNGemper1000 + lnCoalemploy1000 + PerFarmLandhalf + LCVlag + DemPerReS2 + governor2 + NGPerlag + restructure22 + Avepricelag + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit5)
summary(mod.logit4)

#The main effect of this change seems to be reducing the significance of restructuring. Since the standard error is about the same, it's more interesting that the point estimate went down. We could interpret the substantive significance change as well by taking the first differences.

##After looking through all of the data, we made some other small updates and corrections, here and there which are included below. The variables that went unchanged are: windpotential, solar, and tbinmillion. 
#I also did not make many changes to most of the other variables, but I include new and old versions just to be safe.

#NOT SURE IF WE SHOULD DO PER CAPITA OR CARBON INTENSITY OF THE ECONOMY - GHGlagperRealGDP

#rescale the GHG intensity to ease interpretation - will need to figure out the units later
a$GHGlagperRealGDP2 <- a$GHGlagperRealGDP / 1000

## MODEL 2 IN PAPER:

mod.logit6 <- zelig(RPS ~ NAindexalllag2 + GHGlagperRealGDP2 + unemploylag2 + RenCAYPerlag2 + newASES2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit6) # model 2 in paper
summary(mod.logit3) # model 1 in paper - original model




##As you can see Model 6 does not change much from Model 2.

##Take out New ASES##
##ASES chapters probably are susceptible to post-treatment bias. This should be reason enough to take it out. After spending some time looking online I discovered that there really is no data on this variable (none is referenced in the paper and nothing other than a website comes up online). The one website I found that corresponds with their indicators is a list that does not provide any information about change over time. The authors code every year as 1 or 0, no matter when the chapter came into formation or was suspended. A number of the states now listed are different from the authors' coding. For these two reasons, we removed newASES.

#MODEL 3 IN THE PAPER
mod.logit7 <- zelig(RPS ~ NAindexalllag2 + GHGlagperRealGDP2 + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit7)

##When we take out this variable, all of the variables become insignificant except for restructure22 and unemploylag2. Restructure becomes the most important predictor, which makes sense since most of the early RPS laws were enacted around the same time as state restructuring of electricity markets. However, it also doesn't tell us much about regulatory theory, just that restructuing presented an opportunity to achieve green energy goals. Maybe this is important, but we need to expand on it. Essentially I think this takes away all of the authors' results, which will be a good basis for the first half of our paper
#Also, unemployment becomes more important, 

##Is there a chance that state restructuring also has post-treatment bias? Probably not since it was likely that RPS advocates jumped on board the Restructuring train rather than the other way around. The forces that pushed for restructuring were far stronger than the forces that pushed for an RPS. 


##Note to Leah: We want to do First Differences for the original Lyon and Yin model (mod.logit3), the corrected data model (mod.logit6), and the corrected data + removed newASES (mod.logit7)


#WITHOUT RESTRUCTURED ELECTRICITY SYSTEM
mod.logit8 <- zelig(RPS ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit8)

#Now we have four significant variables: higher unemployment decreases the likelihood, and higher energy prices and Democratic legislators increase the likelihood, and wind potential 2 is a predictor (WEIRD). 
#Since higher energy prices are also likely a cause of restructuring, it is hard to view this as an independent causal force. This indicates the need to keep restructure22 in as a control variable.

#Now let's put restructure22 back in, but remove Democratic party and governor, since these variables mean different things across states and years. It would be great to find an ideology metric for state legislatures, but those don't exist. 

#WITHOUT POLITICAL VARIABLES - Govenor and Legislature
mod.logit9 <- zelig(RPS ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit9)

##Suddenly LCVlag2 matters. Although the LCV score is problematic in its own way, it does represent a more consistent indicator of ideology than the party affiliation variables. Now it seems to matter, but how much. Note to 

#SUBSTANTIVE INTERPRETATION OF LCV lag...

#Unemployment also seems quite strong, which is consistent with our intuitions that states do not pass what are viewed as costly RPS standards on business when the economy is weak. Average price also is somewhat significant. I think that we may really be onto something here with average price. I'll check to see how influential average price is in the expanded model. 

##One alternative approach would be to just exclude the former members of the Confederacy from the dataset. These states have a particular history and most of them will never consider enacting something like an RPS, no matter what internal factors are going on (North Carolina being a strange exception which I can explain). Excluding Southern states would also make it more plausible to use the party affiliation variables. 
a <- read.csv("RPSDataExtension.csv")

a <- a[-which(a$state=="NC"),]
a <- a[-which(a$state=="VA"),]
a <- a[-which(a$state=="TN"),]
a <- a[-which(a$state=="GA"),]
a <- a[-which(a$state=="MS"),]
a <- a[-which(a$state=="AL"),]
a <- a[-which(a$state=="SC"),]
a <- a[-which(a$state=="LA"),]
a <- a[-which(a$state=="AR"),]
a <- a[-which(a$state=="TX"),]
a <- a[-which(a$state=="FL"),]

#Let's try Model3 with just the Confederacy states
mod.logit3 <- zelig(RPS ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit3)
##The big difference is that the natural gas industry seems to matter more. Note to Leah: We should discuss this question at some point.




#####New Model######
## We expand the model two more years to capture new RPS's enacted in Kansas, Ohio, and Michigan. I also take out the states that merely have voluntary RPS goals. My reading is that goals are quite different from standards. Many environmental groups do not think that goals alone are very meaningful, and some have even opposed them. 
##Lyon and Yin conflate the two, without justifying why. If we were to use goals and standards then 37 states would qualify. If we just use standards than only 28 states and the District of Columbia count. 
#Regardless we should check out RPS3 -- a project that I have yet to undertake. 
 
##For the new model we need to add back in 2008 and 2009...  we do this by re-loading the data

b <- read.csv("RPSDataExtension.csv")
summary(b)

##We also need to exclude Missouri because its RPS, which was passed in 2008, was enacted by ballot initative. Since we excluded WA and CO for this reason we should also exclude MO. Note that Missouri is included in the Lyon and Yin model because it enacted a goal in 2007 

b <- b[-which(b$state=="MO"),]

#Now let's run the Lyon and Yin model again, this time RPS2, rather than RPS because we're only including standards and we're including the two additional years. 
##9 states and the District of Columbia have enacted mandatory standards. Following Lyon and Yin we exclude Iowa because it was passed early in 1991, CO, WA, and MO because they were enacted by ballot initiatve, and DC because it doesn't have a legisalture
##Nebraska, which is not an RPS state, is also excluded because they have a non-partisan legislature.  

mod.logit4 <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.logit4)
#The results don't seem to change much: still only average price and restructure22 are significant, with some slightly less significant positive correlation wtih Democratic percentage, and solar and windpotential. I think we're onto something with price.


##Let's try it with RPS3 (which includes both goals and standards in the expanded model) to see if it makes a difference.
mod.logit4 <- zelig(RPS3 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.logit4)
#We get much more significant results on unemployment, average price, wind potential and solar

##Let's try excluding the party affiliation variables, since they are likely to be even more problematic now that we've included data from the even more polarized Obama era
mod.logit5 <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.logit5)
#It doesn't seem to change much from Model 4, except now LCVlag2 is more significant, perhaps reflecting the bi-directional relationship between LCVlag2 and the party affiliation variables. Solar potential also seems to matter a lot more. 


##Now let's do the same thing with RPS3 (goals and standards) rather than RPS2 (just standards).
mod.logit5 <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.logit5)


##########
#PAPER EXTENSION

### OUR FINAL MODELS - 4 & 5


# RPS 3 == STANDARDS & GOALS (more similar to Lyon and Yin)
# as energy prices go up, more states pass an RPS... because then these are stable energy prices without fuel 
# also this might be related to restructuring

##One of the biggest problems with this paper is its conflation of political ideology with political affiliation as well as its assumption that the meaning of political affiliation is the same over time when it comes to environmental issues. We know that this is false on both accounts. Going back to Model 4, what would happen if we analyzed only the data from 1994 to 2003. Since the Republican party has become more opposed to environmental legislation over time, we expect that this will make Democratic percentage/ R governorship less important for RPS passage. My theory is that there are different determinants of RPS passage during the 1990's than in the 2000's. I also think that over the 15 year time span the meaning of Democrat changes. Although this is not the best way to take this into account (the better way would be to control for these changes more directly), I think that this strategy begins to get at the problem. 


##########

# MODEL 4 in our paper
#RPS ONLY predictions in 1990s RPS2 ~ (time==1990s)

b <- read.csv("RPSDataExtension.csv")
 

b <- b[-which(b$year=="2003"),]
b <- b[-which(b$year=="2004"),]
b <- b[-which(b$year=="2005"),]
b <- b[-which(b$year=="2006"),]
b <- b[-which(b$year=="2007"),]
b <- b[-which(b$year=="2008"),]
b <- b[-which(b$year=="2009"),]

length(unique(b$state)) #we have 46 states in the dataset now - this is the same as the full dataset

#Exclude MO since they did not enact RPS laws through their legislature.
b <- b[-which(b$state=="MO"),]

b$GHGlagperRealGDP2 <- b$GHGlagperRealGDP / 1000

length(unique(b$state)) #we have 45 states in the dataset now 


#MODEL 4 IN PAPER:
mod4 <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP2 + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + windmed + windlarge + solarmed + solarlarge + tbinmillion, model = "logit", data = b)
summary(mod4)

#Linear version of Model 4 in paper
mod4lm <- lm(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP2 + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + windmed + windlarge + solarmed + solarlarge + tbinmillion, data = b)
summary(mod4lm)

# first differences of model 4

x.out <- setx(mod4, governor2 = 1) #pick a low and high value, or just one
s.out <- sim(mod4, x=x.out) 
summary(s.out)
plot(s.out)

x.low <- setx(mod4, DemPerReS2 = 0.30)
x.high <- setx(mod4, DemPerReS2 = 0.80)
s.out <- sim(mod4, x=x.low, x1=x.high)
summary(s.out)

#going from a republican gov to dem gov = E(Y|gov=1) - E(Y|gov=0)
x.out <- setx(mod4, governor2 = 1) #rep
s.out <- sim(mod4, x=x.out) #E(Y|X) = 0.005
summary(s.out)

x.out <- setx(mod4, governor2 = 0) #dem
s.out <- sim(mod4, x=x.out) #E(Y|X) = 0.001
summary(s.out)

#first difference 0.005 - 0.001 = 0.004
# Holding all other variables at their mean, in the 1990s, you were 5 times as likely to pass an RPS in any given year as a Republican than as a Democratic governor. Although, the effect in any given year is small: having a republican governor only increases the probability of passing an RPS in any year by 0.5% during the 1990s.



##Now we have significant results!  Unemployment and coal employment are negative predictors and HAVING a Republican governor, solar potential, and higher LCV scores are positive predictors. Interestingly, there's a positive association Republican governor, perhaps reflecting the  support rather than party politics, and no association with the Democratic party. My theory for these results is that an RPS was seen during the 1990's as a pro-business and reasonable approach to addressing climate change. Republicans were also not so steadfastly opposed to environmental issues at this time. Additionally, the Southern Republican realignment had not gone into effect, weakening the connection between environmental policy and the Democratic party. 

#You get the same basic results with RPS3, demonstrating that the same factors seem to matter for RPS and RPG. 
mod.1c.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.1c.logit)
summary(mod.1.logit)

# As you can see, when you remove the Southern states for this period, the Dem legislature variable matters more, with a significance at the 10% level. 
b <- b[-which(b$state=="NC"),]
b <- b[-which(b$state=="VA"),]
b <- b[-which(b$state=="TN"),]
b <- b[-which(b$state=="GA"),]
b <- b[-which(b$state=="MS"),]
b <- b[-which(b$state=="AL"),]
b <- b[-which(b$state=="SC"),]
b <- b[-which(b$state=="LA"),]
b <- b[-which(b$state=="AR"),]
b <- b[-which(b$state=="TX"),]
b <- b[-which(b$state=="FL"),] #now we are left with 298 observations - about 50% less

length(unique(b$state)) #we have 35 states in the dataset now

#MODEL 1B: 1990s RPS with South excluded
mod.1b.logit <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.1b.logit)
summary(mod.1.logit)

#When we remove the south, it looks like the effect of a democratic legislature goes away [THOUGHTS CHASE?]; we are still seeing the effects of a republican governor. Now coal and the LCV score seem to matter more (is there no coal in the south? I'm confused)


#######################


#RPS in 2000s analysis
##Now let's just look at the data from the Bush and Obama era which has been defined by higher levels of partisan and ideological polarization, as well as more ideologically consistent political parties. 

#MODEL 5 in our paper
#RPS in 2000s

b <- read.csv("RPSDataExtension.csv")

b <- b[-which(b$year=="1994"),]
b <- b[-which(b$year=="1995"),]
b <- b[-which(b$year=="1996"),]
b <- b[-which(b$year=="1997"),]
b <- b[-which(b$year=="1998"),]
b <- b[-which(b$year=="1999"),]
b <- b[-which(b$year=="2000"),] 
b <- b[-which(b$year=="2001"),] 

#Exclude Missouri
b <- b[-which(b$state=="MO"),]

#rescale GHG
b$GHGlagperRealGDP2 <- b$GHGlagperRealGDP / 1000

mod5 <- zelig(RPS2 ~ NAindexalllag2 + GHGlagperRealGDP2 + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + windmed + windlarge + solarmed + solarlarge + tbinmillion, model = "logit", data = b)
summary(mod5)


#first differences...


#going from a republican gov to dem gov = E(Y|gov=1) - E(Y|gov=0)
x.out <- setx(mod5, governor2 = 1) #rep
s.out <- sim(mod5, x=x.out) #E(Y|X) = 0.002
summary(s.out)

x.out <- setx(mod5, governor2 = 0) #dem
s.out <- sim(mod5, x=x.out) #E(Y|X) = 0.014
summary(s.out)

#In contrast, in the 2000s, you are 7 times as likely to pass an RPS if you have a democratic governor than if you are a democratic governor. And on a yearly basis, the effect sizes are larger: in each year, a democratic governor is expected

x.low <- setx(mod5, DemPerReS2 = 0.0)
x.high <- setx(mod5, DemPerReS2 = 1.00)
s.out <- sim(mod5, x=x.low, x1=x.high)
summary(s.out)

x.out <- setx(mod5, DemPerReS2 = 0.25) #dem
s.out <- sim(mod5, x=x.out) #E(Y|X) = 0.001
summary(s.out)

x.out <- setx(mod5, DemPerReS2 = 0.75) #dem
s.out <- sim(mod5, x=x.out) #E(Y|X) = 0.001
summary(s.out)

##Now Republican governor is negatively associated with RPS adoption and Democrat percentage is predictive at the 1% level. This is the important finding. The results switch.  At the same time, special interests continue to matter, with renewable energy capacity and solar potential positively predictive, and  natual gas influence negatively predictive. More interestingly is the significance of average electricity price; during the 2000's higher prices may have pushed legislators to look for new energy solutions, opening the door for the renewable energy argument. This may also suggest that as the gap between natural gas and renewable energy prices expands, there will be less support for an RPS standard. The current glut of natural gas may be one reason that RPS standards are facing political heat right now. Also surprising is that renewable energy capacity now negatively predicts. 
#Regardless of how we do it I think that our new model should show that different factors predict RPS passage during different periods of time. The interests and people supporting RPS laws have changed over the years, making it difficult to trace in a time series that lasts 15 years (this may explain why the Lyon and Yin model tells us absolutely nothing). Equally important is the fact that we do not have a good indicator of ideology, and our already poor indicator (partisanship) is even worse because its meaning differs widely across years and states. Party affiliation doesn't predict it in the 1990's because this was a bipartisan issue and the parties were more ideologically diverse, particularly in the South. But it does predict it in the Bush and Clinton period when RPS laws were politicized in a way where Democrats supported them and Republicans opposed them. This politicization is particularly evidence today, where 16 mostly Republican led state governments are considering reversing RPS laws. (Also because of natural gas prices being so low!)











########### OTHER MODELS


##Let's try the same model with RPS3
mod.2c.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.2c.logit)
summary(mod.2.logit)

#Whoa! Now everything is significant. I don't really know what happened. 


## RPS in 2000s excluding the south
#note - Texas was already cut - passed the RPS earlier - 1999

unique(b$state)

b <- b[-which(b$state=="NC"),]
b <- b[-which(b$state=="VA"),]
b <- b[-which(b$state=="TN"),]
b <- b[-which(b$state=="GA"),]
b <- b[-which(b$state=="MS"),]
b <- b[-which(b$state=="AL"),]
b <- b[-which(b$state=="SC"),]
b <- b[-which(b$state=="LA"),]
b <- b[-which(b$state=="AR"),] 
b <- b[-which(b$state=="FL"),] #now we are left with 173 observations - about 75% less - not much data!

unique(b$state) #these are the states still in the dataset...

length(unique(b$state))  #26 states (we had 35 for the 1990s-south)

mod.2b.logit <- zelig(RPS2 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.2b.logit)
summary(mod.2.logit)

# The results are now a bit more parsimonious. 

#Let's see what happens when we look at RPS3 for the 2000's, South excluded. 

mod.2b.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.2b.logit)



#MODEL NOT REPORTED IN OUR PAPER
#I don't think that we need to find first differences for any of these. 
#1990- Present; south excluded
##Just for kicks, let's examine the entire time series again but with the South excluded.
a <- read.csv("RPSDataExtension.csv")

a <- a[-which(a$state=="NC"),]
a <- a[-which(a$state=="VA"),]
a <- a[-which(a$state=="TN"),]
a <- a[-which(a$state=="GA"),]
a <- a[-which(a$state=="MS"),]
a <- a[-which(a$state=="AL"),]
a <- a[-which(a$state=="SC"),]
a <- a[-which(a$state=="LA"),]
a <- a[-which(a$state=="AR"),]
a <- a[-which(a$state=="TX"),]
a <- a[-which(a$state=="FL"),]

#Exclude Missouri
a <- a[-which(a$state=="MO"),]

mod.logit4 <- zelig(RPS2 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = a)
summary(mod.logit4)

##The results are similar to what we found earlier, but even stronger -- (we will just report this in text, rather than writing it up in a table). Without the South, party affilitation becomes a much stronger predictor for RPS passage. This makes a lot of sense because for much of our series, states like Arkansas, Alabama, Mississippi, and Louisiana have really high percentages of Democrats, and most of these Democrats are not interested in reducing GHG emissions. In non-Southern states the association between environmental issues and Democrats is much stronger. Excluding the South may be a better way to addressing the party affiliation problem than cutting out years (which also cuts out RPS observations...  since none of the Southern states other than NC have passed an RPS, this subset cuts out fewer of our cases)


#MODEL 3 in PAPER -- I THINK THIS IS EXACTLY THE SAME AS MODEL 1 BECAUSE GOALS WERE NOT DONE UNTIL 2000s... IS THIS TRUE? IF SO, WE EXCLUDE
#RPS and RPG - 1990s

b <- read.csv("RPSDataExtension.csv")

b <- b[-which(b$year=="2004"),]
b <- b[-which(b$year=="2005"),]
b <- b[-which(b$year=="2006"),]
b <- b[-which(b$year=="2007"),]
b <- b[-which(b$year=="2008"),]
b <- b[-which(b$year=="2009"),]

b <- b[-which(b$state=="MO"),]


length(unique(b$state)) #we have 45 states in the dataset now - this is one less than the full dataset - need to exclude missouri


mod.3.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 +LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + windmed + windlarge + solarmed + solarlarge, data=b, model="logit")
summary(mod.3.logit)

#first differences

x.out <- setx(mod.3.logit, restructure22=1) #pick a low and high value, or just one
s.out <- sim(mod.3.logit, x=x.out) 
summary(s.out)
plot(s.out)



#MODEL 3B - South excluded, 1990s, RPS/RPGs

b <- b[-which(b$state=="NC"),]
b <- b[-which(b$state=="VA"),]
b <- b[-which(b$state=="TN"),]
b <- b[-which(b$state=="GA"),]
b <- b[-which(b$state=="MS"),]
b <- b[-which(b$state=="AL"),]
b <- b[-which(b$state=="SC"),]
b <- b[-which(b$state=="LA"),]
b <- b[-which(b$state=="AR"),]
b <- b[-which(b$state=="TX"),]
b <- b[-which(b$state=="FL"),] #now we are left with 298 observations - about 50% less

length(unique(b$state)) #we have 35 states in the dataset now

#MODEL 3B: 1990s RPS/RPG with South excluded
mod.3b.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.3b.logit)
summary(mod.3.logit)
summary(mod.1b.logit)


##########

#MODEL 4 in PAPER
#RPS and RPG - 2000s

b <- read.csv("RPSDataExtension.csv")
summary(b)
b <- b[-which(b$year=="1994"),]
b <- b[-which(b$year=="1995"),]
b <- b[-which(b$year=="1996"),]
b <- b[-which(b$year=="1997"),]
b <- b[-which(b$year=="1998"),]
b <- b[-which(b$year=="1999"),]
b <- b[-which(b$year=="2000"),] #QUESTION: why include with 1990s vs 2000s? Just curious what our reasons are for a cutoff -- we should probably have a theoretically justified reason...

#Exclude Missouri
b <- b[-which(b$state=="MO"),]

mod.4.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b) #outcome variable now RPS3
summary(mod.4.logit)
summary(mod.2.logit) #compare models 2 and 4 [although I'm not sure we have the base case correct for model 2 yet -- see my not in the paper]

#All the ideology variables still look the same, as does natural gas % (negative) restructure (+) and average price (+). I would say that since we've run multiple specifications and we keep seeing this, I'm feeling these are more robust findings... We might want to try running a model that ONLY models these... will ask Konstantin.


## RPS/RPG in 2000s excluding the south
#note - Texas was already cut - passed the RPS earlier - 1999
unique(b$state)

b <- b[-which(b$state=="NC"),]
b <- b[-which(b$state=="VA"),]
b <- b[-which(b$state=="TN"),]
b <- b[-which(b$state=="GA"),]
b <- b[-which(b$state=="MS"),]
b <- b[-which(b$state=="AL"),]
b <- b[-which(b$state=="SC"),]
b <- b[-which(b$state=="LA"),]
b <- b[-which(b$state=="AR"),] 
b <- b[-which(b$state=="FL"),] #now we are left with 173 observations - about 75% less - not much data!

unique(b$state) #these are the states still in the dataset...

length(unique(b$state))  #26 states (we had 35 for the 1990s-south)

mod.4b.logit <- zelig(RPS3 ~ NAindexalllag2 + GHGLag2PerCapita + unemploylag2 + RenCAYPerlag2 + lnOIlNGemper10002 + lnCoalemploy10002 + PerFarmLandhalf2 + LCVlag2 + DemPerReS2 + governor2 + NGPerlag2 + restructure22 + Avepricelag2 + incomelag2 + as.factor(windpotential) + as.factor(solar) + tbinmillion, model = "logit", data = b)
summary(mod.4b.logit)
summary(mod.4.logit)
summary(mod.2b.logit)

#this model picks up lots of weird stuff, but again, our core 5 variables are still coming up. So I think that's good.



###FIRST DIFFERENCES

mod1 <- 
x.out <- setx(mod1, educate =) #pick a low and high value, or just one
s.out <- sim() #this will compare the relationship between 2 things

s.out <- sim(z.out, x=x.low, x1=x.high)

#a regression is the same as a logit if it's equal to around 0.5 so I can do the diagnostics


